#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import random
import string
import json

from bs4 import BeautifulSoup
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import Spider, Request, Rule
from ..items import UrlItem

class BookSubjectSpider(Spider):
    name = 'taoguba_link_yj'
    start_urls = [('https://www.taoguba.com.cn/moreTopic?pageNo=%s&sortFlag=T&userID=511605' % i) for i in range(1, 16)]
    urls_res = []

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0"}
    # 指定cookies
    cookies = {
        'JSESSIONID': '341e262c-8ec7-4804-9adf-acdad4a31240',
        'UM_distinctid': '16ddd47cb3e38a-0b75361baa3dd9-b363e65-161012-16ddd47cb3f38c',
        'Hm_lvt_cc6a63a887a7d811c92b7cc41c441837': '1571375533', 'tgbuser': '3592025',
        'tgbpwd': 'E66BE235684br5mgcddtpr7zth', 'onedayyszc': '1571414400000',
        'CNZZDATA1574657': 'cnzz_eid%3D1674106506-1571373394-%26ntime%3D1571627532',
        'Hm_lpvt_cc6a63a887a7d811c92b7cc41c441837': '1571631277'
    }

    def start_requests(self):
        for url in self.start_urls:
            yield Request(url, callback=self.parse, cookies=self.cookies)

    def parse(self, response):
        if 302 == response.status:
            print(response.url)
        else:
            soup = BeautifulSoup(response.text)
            tds = soup.select("td[class='suh']")
            for td in tds:
                for a in td.findAll("a"):
                    if re.search("涨停板复盘",a.string):
                        print(a.string)
                        n_p='https://www.taoguba.com.cn/%s' % a.get('href')
                        o_url_item = UrlItem()
                        o_url_item['url'] = n_p
                        self.urls_res.append(n_p)  # bs 的用法。
                        yield o_url_item

    def closed(self, reason):
        print(self.urls_res)
